<?php
/**
 * Created by PhpStorm.
 * User: wangjie
 * Date: 2018/9/18
 * Time: 15:55
 */

class demo
{
    /**
     * cURL基本操作
     * @param $url
     * @return mixed
     */
    public function curl($url)
    {
        $ch = curl_init();
        curl_setopt($ch, CURLOPT_URL, $url);
        curl_setopt($ch, CURLOPT_FAILONERROR, true);
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
        curl_setopt($ch, CURLOPT_AUTOREFERER, true);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_TIMEOUT, 5);
        $SSL = substr($url, 0, 8) == "https://" ? true : false;
        if ($SSL) {
            curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // 信任任何证书
            curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2); // 检查证书中是否设置域名
        }
        $content = curl_exec($ch);
        curl_close($ch);
        return $content;
    }

    /**
     * 主方法，由于不考虑性能 所以一次性操作，建议使用curl进行拉取
     * 字段比较多，而且方法比较偏面向过程操作方式，当然能优化更好
     */
    public function index()
    {
        // 链接数据库
        $pdo = new PDO("mysql:host=localhost;dbname=seek;port=3306;charset=utf8", "root", "123456");
        // 开启错误提示
        $pdo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
        // 查询车型库 状态为0表示的是该数据未拉取，防止在请求拉取数据的模式为CGI时进行一下操作，CLI可以把save操作去掉，这样会快很多
        $sqlModel = $pdo->query('
                    SELECT
                        `model`.`id`,
                        `brand`.`first_letter`,
                        `model`.`brand_id`,
                        `model`.`name` AS car_name,
                        `brand`.`name` AS brand_name,
                        `model`.`status`
                    FROM
                        auto_brand AS brand
                    LEFT JOIN auto_model AS model ON model.brand_id = brand.id
                    WHERE `model`.`status` = 0
                    ORDER BY
                        `brand`.`first_letter` ASC,
                        `model`.`brand_id` ASC,
                        `model`.`id` ASC
        ');
        $autoModel = $sqlModel->fetchAll();
        if (!empty($autoModel)) {
            foreach ($autoModel as $key => $value) {
                $status = -1;
                // 这是汽车参数详情网址，拼接的id是对应汽车之家库中的车型id
                $brandJson = $this->curl("https://car.autohome.com.cn/config/series/{$value['id']}.html");
                // 截取字符串，这里是截取页面数据的关键
                $preg = '/config[\s\S]*?var option/i';
                preg_match_all($preg, $brandJson, $res);
                // 截取不到的直接跳过，并且状态置为-1 表示汽车之家拉取不到该数据 截取到了 在后续出该模块的时候状态置为1
                if (!empty($res) && !empty($res[0])) {
                    $status = 1;
                    // 取出需要的两个关键字中全部字符串
                    $str = $this->cut('config = ', ' var option', $res[0][0]);
                    // 之前截取存在隐形空格，所以导致截取失败
                    $str = rtrim(trim($str), "\;");
                    // 截取后展示全部数据数组
                    $result = json_decode($str, true);
                    $carInfoArr = $this->carDataArrayHandle($result);
                    foreach ($carInfoArr as $ke => $val) {
                        $insertModel = $pdo->prepare("insert into auto_info (`model_id`,`name`,`content`,`displacement`,`seating`,`drive`,`energy`,`price`,`level_info`,`length`,`width`,`height`,`wheelbase`,`highest_km`,`tank`,`cylinder`,`engine_type`,`horsepower`,`power`,`speed`,`torque`,`torque_speed`,`oil_wear`,`list_time`) values (
                              :model_id,
                              :name,
                              :content,
                              :displacement,
                              :seating,
                              :drive,
                              :energy,
                              :price,
                              :level_info,
                              :length,
                              :width,
                              :height,
                              :wheelbase,
                              :highest_km,
                              :tank,
                              :cylinder,
                              :engine_type,
                              :horsepower,
                              :power,
                              :speed,
                              :torque,
                              :torque_speed,
                              :oil_wear,
                              :list_time
                        )");
                        $insertModel->execute([
                            ':model_id' => $value['id'],
                            ':name' => $value['car_name']??'',
                            ':content' => $val['0']??'',
                            ':displacement' => $val['35']??'',
                            ':seating' => (int)$val['29']??0,
                            ':drive' => $val['60']??'',
                            ':energy' => $val['50']??'',
                            ':price' => $val['1']??'',
                            ':level_info' => $val['3']??'',
                            ':length' => (int)$val['20']??0,
                            ':width' => (int)$val['21']??0,
                            ':height' => (int)$val['22']??0,
                            ':wheelbase' => (int)$val['23']??0,
                            ':highest_km' => (int)$val['13']??0,
                            ':tank' => (int)$val['30']??0,
                            ':cylinder' => (int)$val['38']??0,
                            ':engine_type' => $val['33']??'',
                            ':horsepower' => (int)$val['44']??0,
                            ':power' => (int)$val['7']??0,
                            ':speed' => $val['46']??'',
                            ':torque' => (int)$val['8']??0,
                            ':torque_speed' => $val['48']??'',
                            ':oil_wear' => $val['48']??'',
                            ':list_time' => $val['5']??'',
                        ]);
                    }
                }
                // 根据id查询车型表 更新对应数据
                $sql = "update auto_model set `status` = {$status} where id = {$value['id']}";
                $pdo->exec($sql);
            }
        }
        echo "Script execution completed \n";
    }

    /**
     *
     * @param $result
     * @return array
     */
    public function carDataArrayHandle($result)
    {
        /************************ 数组操作 字段固定，直接写死代码,分批操作 *********************/
        // 车辆基础数据
        // 0.基本参数 1.车身 2.发动机 3.变速箱 4.底盘转向 5.车轮制动
        $carBasicData = $result['result']['paramtypeitems'];

        // carInfoArr数组 0.车型 1.厂商指导价格 2.厂商 3.级别 4.能源类型 5.上市时间 6.工信部纯电续驶里程(km) 7.最大功率(KW) 8.最大扭矩(N.m)
        // 9.发动机 10.变速箱 11.长*宽*高 12.车身结构 13.最高车速(km/h) 14.官方0-100km/h加速(s) 15.0-100km/h加速(s) 16.100-0km/h制动(m)
        // 17.工信部综合油耗(L/100km) 18.实测油耗(L/100km) 19.整车质保 20.长度 21.宽度 22.高度 23.轴距 24.前轮距 25.后轮距 26.最小离地间隙
        // 27.车身结构 28.车门数 29.座位数 30.邮箱容积 31.行李箱容积 32.整备质量 33.发动机型号 34.排量(ml) 35.排量(L) 36.进气形式 37.气缸排列
        // 38.气缸数(个) 39.每缸气门数(个) 40.压缩比 41.配气结构 42.缸径(mm) 43.行程(mm) 44.最大马力(ps) 45.最大功率(kW) 46.最大功率转速(rmp)
        // 47.最大扭矩(N.m) 48.最大扭矩转速(rpm) 49.发动机持有技术 50.燃料形式 51.燃油标号 52.供油方式 53.缸盖材料 54.缸体材料 55.环保标准
        // 56.档位个数 57.变速箱类型 58.简称 59.底盘转向 60.四驱形式 61.中央变速器结构 62.前悬架类型 63.后悬架类型 64.助力类型 65.车体结构
        // 66.前制动类型 67.后制动类型 68.驻车制动类型 69.前轮胎规格 70.后轮胎规格 71.备胎规格
        $carInfoArr = [];
        // carBasicInfo基本参数
        $carBasicInfo = $carBasicData[0]['paramitems'];

        $carInfoArr = $this->carInfo($carBasicInfo, $carInfoArr);

        // carBody车身
        $carBodyInfo = $carBasicData[1]['paramitems'];
        $carInfoArr = $this->carInfo($carBodyInfo, $carInfoArr);

        // carEngineInfo发动机
        $carEngineInfo = $carBasicData[2]['paramitems'];
        $carInfoArr = $this->carInfo($carEngineInfo, $carInfoArr);

        // carGearCaseInfo变速箱
        $carGearCaseInfo = $carBasicData[3]['paramitems'];
        $carInfoArr = $this->carInfo($carGearCaseInfo, $carInfoArr);

        // carChassisInfo底盘转向
        $carChassisInfo = $carBasicData[4]['paramitems'];
        $carInfoArr = $this->carInfo($carChassisInfo, $carInfoArr);

        // carWhellInfo车轮制动
        $carWhellInfo = $carBasicData[5]['paramitems'];
        $carInfoArr = $this->carInfo($carWhellInfo, $carInfoArr);

        return $carInfoArr;
    }

    /**
     * carBodyInfo 车身数据
     *
     * @param $bodyInfo
     * @param $carInfoArr
     * @return array
     */
    public function carInfo($bodyInfo, $carInfoArr)
    {
        $array = [];
        foreach ($bodyInfo as $key => $value) {
            foreach ($value['valueitems'] as $ke => $val) {
                $str = htmlspecialchars_decode($val['value']);
                $str = preg_replace("/<span[^>]*>(.*?)<\/span>/is", "$1", $str);
                $array[$ke][] = $str;
            }
        }
        return $this->arrayMerge($carInfoArr, $array);
    }

    /**
     * 数组处理
     * @param $mainArr
     * @param $pushArr
     * @return array
     */
    public function arrayMerge($mainArr, $pushArr)
    {
        if (empty($mainArr)) {
            return $pushArr;
        }
        foreach ($mainArr as $ke => $vo) {
            $list[] = array_merge($vo, $pushArr[$ke]);
        }
        return $list;
    }

    /**
     * 截取两个关键字字符串中间内容
     * @param $begin
     * @param $end
     * @param $str
     * @return string
     */
    public function cut($begin, $end, $str)
    {
        $b = mb_strpos($str, $begin) + mb_strlen($begin);
        $e = mb_strpos($str, $end) - $b;

        return mb_substr($str, $b, $e);
    }
}

$crawl = new demo();
$crawl->index();